# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)

# === Figure 2(a). Aggregate mutual fund ownership
rm(list = ls())

data = readRDS('input_data/mutual_fund_industry_size.RDS')
ggplot(data, aes(x = date, y = value, color = source)) + geom_line(lwd = 1) + coord_cartesian(ylim = c(0,.35)) + 
  theme_classic() + theme(legend.position = c(.8, .2), legend.title = element_blank()) + 
  geom_vline(xintercept = as.Date('2002-06-01'), lty = 3, col = 2) + labs(x = NULL, y = '% Market capitalization') + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

# === Figure 2(b). Number of mutual funds
rm(list = ls())

# get latest rating by fund in each year
data = readRDS('input_data/monthly_fund_data.RDS')
data[, yyyy := floor(yyyymm/100)]
data = data[, list(rating = last(rating)), list(yyyy, fundno)]

# count the number of funds with each rating, and plot
data = data[, list(obs = length(fundno)), list(yyyy, rating)]
data[, rating_label := paste0(rating, ifelse(rating == 1, ' star', ' stars'))]

ggplot(data, aes(x = yyyy, y = obs, fill = reorder(rating_label, -rating))) + geom_bar(position = 'stack', stat = 'identity') + 
  theme_classic() + theme(legend.title = element_blank()) + labs(x = NULL, y = 'Number of funds')
